


rm(list=ls())
setwd("~/Dropbox/Projects/PoliticsOfScience/Committes_Science/ScienceReplication")

tidy.feglm <- function(x, ...) {
  s <- summary(x,  type = "sandwich")
  ret <- data.frame(
    term      = row.names(s$cm),
    estimate  = s$cm[, 1],
    std.error  = s$cm[, 2]
  )
  ret
}

glance.feglm <- function(x, ...) {
  ret <- data.frame(
    Null.Dev = x$null.deviance,
    Deviance = x$deviance,
    N   = x$nobs[4],
    `Committee FE` = as.integer(length(x$nms.fe[[1]])),
    `Congress FE` = as.integer(length(x$nms.fe[[2]])))
  ret
}
library(tidyverse)
library(lme4)
library(lubridate)


policy <- read_csv("Data/USAPolicyDocumentsForAggFiltered.csv")
unique(policy$policy_document_id) %>%length()



source_first_year <- policy %>% group_by(policy_source_id) %>% summarise(first_year = min(pub_year))

policy <- policy %>% filter(policy_source_type != "other")

policy <- policy %>% left_join(source_first_year)

policy %>% filter(overton_policy_document_series == "Publication", first_year <1998) %>% pull(policy_source_id) %>% unique() %>% length()

policy %>% filter(overton_policy_document_series == "Publication", first_year <1998, policy_source_type == "government") %>% pull(policy_source_id) %>% unique() %>% length()

policy %>% filter(overton_policy_document_series == "Publication", first_year <1998, policy_source_type == "think tank") %>% pull(policy_source_id) %>% unique() %>% length()


p_count_all <- policy %>% filter(, first_year <1998, overton_policy_document_series == "Publication", pub_year >2000) %>% ggplot(aes(x=pub_year_char, y = cites_sci)) +  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + 
  stat_summary(fun.y = mean, geom = "point", size=3) + theme_minimal() + xlab("Year") + ylab("Percent of Publications Citing Science") + scale_x_discrete(breaks = c(1996,2000,2004,2008,2012, 2016, 2020)) +ggtitle("All policy documents")


p_count_tt <- policy %>% filter(, first_year <1998, pub_year >2000,policy_source_type == "think tank", overton_policy_document_series == "Publication") %>% ggplot(aes(x=pub_year_char, y = cites_sci)) +  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + 
  stat_summary(fun.y = mean, geom = "point", size=3) + theme_minimal() + xlab("Year") + ylab("Percent of Publications Citing Science")+ scale_x_discrete(breaks = c(1996,2000,2004,2008,2012, 2016, 2020)) +ggtitle("Think tank policy documents")

p_count_gov <- policy %>% filter(, first_year <1998, pub_year >2000,policy_source_type == "government", overton_policy_document_series == "Publication") %>% ggplot(aes(x=pub_year_char, y = cites_sci)) +  stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0, size=2, alpha=.65) + 
  stat_summary(fun.y = mean, geom = "point", size=3) + theme_minimal() + xlab("Year") + ylab("Percent of Publications Citing Science")+ scale_x_discrete(breaks = c(1996,2000,2004,2008,2012, 2016, 2020)) +ggtitle("Government policy documents")

library(cowplot)

p_out <-cowplot::plot_grid(p_count_all, p_count_gov, p_count_tt, nrow = 1, labels = c("a", "b", "c"))

ggsave("Output/FigS3.pdf", p_out, device = "pdf", width = 11, height = 4, units = "in")


##### Describe Overton Data for SI 1

#number of sources, documents
policy$policy_source_id %>% unique() %>% length()
policy$policy_document_id %>% unique() %>% length()


policy$policy_source_id[policy$policy_source_type == "government"] %>% unique()

policy$policy_source_id[policy$policy_source_type == "think tank"] %>% unique() 

policy %>% dplyr::select(policy_source_title, policy_source_type) %>% unique() %>% write_csv("Output/OvertonUSAPolicySourceTable.csv")

policy %>% filter(policy_source_type == "think tank") %>% dplyr::select(policy_source_title, policy_source_type) %>% unique() %>% write_csv("OvertonUSAPolicyThinkTankSourceTable.csv")



sum_vars <- policy %>% group_by(policy_source_id) %>% summarise(first_year = min(first_year),
                                                    num_years = length(unique(pub_year_char)),
                                                    num_docs = length(unique(policy_document_id)),
                                                    cites_science_share = mean(cites_sci, na.rm = T),
                                                    cites_hitscience_share = mean(cites_hitsci, na.rm = T),
                                                    cites_nonhitscience_share = mean(cites_nonhitsci, na.rm = T),
                                                    
)
library(vtable)
st(sum_vars, out = "csv") %>% write_csv("Output/TableS2.csv")


table(policy$overton_policy_document_series) %>% as.data.frame() %>% write_csv("Output/TableS3.csv")

library(MASS)
sum_vars %>% ggplot(aes(y=cites_science_share, x=first_year)) + geom_jitter() +geom_smooth(method ="rlm") + theme_minimal() +
  ylab("Percent of Documents Citing Science") + xlab("First Year in Data") -> firstyearscatter

ggsave("Output/FirstYearScatterFiltered.pdf", firstyearscatter, device="pdf", width = 6, height = 6)

library(fixest)
fy<-feols(cites_sci~ first_year + overton_policy_document_series +policy_source_type | pub_year, policy) 

library(modelsummary)
modelsummary(fy, output = "Output/TableS4.docx")
library(ggplot2)

policy %>% group_by(policy_source_type, pub_year) %>% summarise(num = length(unique(policy_document_id))) %>%
  ggplot(aes(x=pub_year, y= num, linetype = policy_source_type)) + geom_line() + theme_minimal() +
  xlab("Publication Year") + ylab("Number of Documents") + scale_linetype("") -> docnum

ggsave("Output/FigS1A.pdf", docnum, device = "pdf", width = 6, height = 6)


library(sjPlot)
library(sjmisc)


library(broom.mixed)
policy$pub_year_char <- as.character(policy$pub_year_char)
megatrend <- glmer(cites_sci~ pub_year_char + overton_policy_document_series + first_year + policy_source_type +  early_committee + (1|policy_source_id),
                   family = binomial(link="logit"),
                   data =  as.data.frame(filter(policy,pub_year > 1994)), 
                   nAGQ = 0)



library(ggeffects)

library(marginaleffects)

emms <- ggemmeans(megatrend, terms = c("pub_year_char"), 
                  ci_level = 0.85, type = "fe", typical = "mean", 
                  condition = c(overton_policy_document_series = "Publication"))

emms <- as_tibble(emms) %>% mutate(year = as.numeric(as.character(x)))

overall_citation_trend <- emms %>% ggplot(aes(x=year, y=predicted, ymin=conf.low, ymax = conf.high, group =group)) +
  geom_line(size=1) +geom_ribbon(alpha = .2, color = NA) + theme_minimal() +ylab("Predicted Percent of Publications Citing Science") +
  xlab("") + ylim(0,.5)

ggsave("Output/Fig1A.pdf", overall_citation_trend, device = "pdf", width = 4, height = 4, units = "in")


source_type_regs <- policy %>% split(.$policy_source_type) %>%
  map(~glmer(cites_sci ~ pub_year_char + overton_policy_document_series + first_year + (1|policy_source_id),
             family = binomial(link="logit"),
             data =  as.data.frame(filter(.x, pub_year > 1994)), 
             nAGQ = 0))

source_type_regs_out <- source_type_regs %>%
  map(~ggemmeans(.x, terms = c("pub_year_char"), 
                 ci.lvl = 0.85, type = "fe", typical = "mean", 
                 condition = c(overton_policy_document_series = "Publication"))) %>%
  map(~as_tibble(.x)) %>%
  bind_rows( .id = "id")

source_citation_trend <- source_type_regs_out %>% ggplot(aes(x=as.numeric(as.character(x)), y=predicted, ymin=conf.low, ymax = conf.high, group =id)) +
  geom_line(size=1) +geom_ribbon(alpha = .2, color = NA) + theme_minimal() +ylab("Predicted Percent of Publications Citing Science") +
  facet_wrap(~id, scales = "free", ncol = 2) + xlab("") + ylim(0,.5)

ggsave("Output/FigS2.pdf", source_citation_trend, device = "pdf", width = 6, height = 3, units = "in")

modelsummary(list("All" = megatrend, "Government" = source_type_regs$government, "Think Tank" = source_type_regs$`think tank`), output = "TableS7.docx")




